#!/usr/bin/env python 
# -*- coding: utf-8 -*-

"""
@author: zyx
@since: 2022/2/7 10:20
@file: c06_代理批量获取.py
"""

# url：https://www.kuaidaili.com/free
# 需求：将前5页的所有id和port解析且存储到文件中

# 只爬取了第一页的内容
from bs4 import BeautifulSoup
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}
url = 'https://www.kuaidaili.com/free'
page_text = requests.get(url=url, headers=headers).text
print(page_text)
soup = BeautifulSoup(page_text, 'lxml')
trs = soup.select('tbody > tr')
for tr in trs:
    t1 = tr.findAll('td')[0]
    t2 = tr.findAll('td')[1]
    ip = t1.string
    port = t2.string
    print(ip, port)
